RYM Data Analysis
A Recreation of Several Plots I Made In Excel

library(ggplot2)
library(plotly)
theme_set(theme_minimal())


# READING DATA
# Read in a CSV file without headers
data <- read.csv(file = "D:\\Downloads\\rymList (3).csv", header = FALSE)

# drop na
data <- na.omit(data)

# Manually assign the header names
names(data) <- c("Artist", "Album", "Year", "Date", "Rating")


# convert date column from type string to type date
data$Date <- as.Date(data$Date, format = "%d/%m/%Y")

# function to convert "x.x0 stars" to int
convertRating <- function(ratingStr) {
    ratingStr <- substr(ratingStr, 1, 3) # trim chars
    ratingNum <- as.double(ratingStr) * 2 # convert type

    return(ratingNum)
}

# use the function on the rating column
data$Rating <- sapply(data$Rating, convertRating)

# remove rows with NA rating (not yet rated at time of scraping)
data <- data[!is.na(data$Rating), ]
# plot()(data$Date, data$Year,
#     col = "#4472C4",
#     pch = 19,
#     main = "Release Years of Rated Albums vs. Time",
#     xlab = "Time",
#     ylab = "Release Years of Rated Albums"
# )

# plot(data$Date, data$Rating,
#     ylim = c(1, 10),
#     col = "#4472C4",
#     pch = 19,
#     main = "Albums Ratings vs. Time",
#     xlab = "Time",
#     ylab = "Albums Ratings"
# )
p1 <- ggplot(data,
        aes(
            x = Date,
            y = Year
        )
    ) +
    geom_point(color = "#4472C4") +
    labs(
        title = "Date Rated / Album Year",
        x = "Date Rated",
        y = "Year of Release",
    )
ggplotly(p1)
p2 <- ggplot(data,
        aes(
            x = Date,
            y = Rating,
            group = 1,
            text = paste(
                "Album: ", Album,
                "</br>Artist: ", Artist
            )
        ),
        aes_string(
            group = column
        )
    ) +
    geom_point(color = "#4472C4") +
    labs(
        title = "Date Rated / Album Rating",
        x = "Date Rated",
        y = "Rating",
    )

ggplotly(p2)

Time Series Plots

# reverse list order and save in new var
df <- data[nrow(data):1, ]

# renumber index
row.names(df) <- 1:nrow(df)

# give it its own name so it appears that way in interactive hover
index <- as.numeric(row.names(df))
# plot list index against time
# dummy aesthetic: https://stackoverflow.com/a/43763132
tplot <- ggplot(
    data = df,
        aes(
            x = Date,
            y = index,
            color = Rating,
            group = 1,
            text = paste(
                "Album: ", Album,
                "</br>Artist: ", Artist
            )
        ),
        aes_string(
            x = "name", y = "rate", colour = column,
            group = column
        )
    ) +
    geom_line(color = "#00AFBB", size = 1, linetype = "dashed") +
    geom_point(size = 0.5) + 
    labs(
        title = "Total number of albums rated / Time",
        x = "Time",
        y = "Total number of albums rated",
    )

ggplotly(
    p = tplot,
    tooltip = c("x", "y", "text")
)

Histogram

p <- ggplot(df, aes(x = Rating)) + 
    geom_histogram(binwidth = 1, color = "darkblue", fill = "lightblue") +
    scale_x_continuous(breaks = seq(1, 10, by = 1))

ggplotly(p)
rankplot <- ggplot(df, aes(x = Year)) + 
    geom_histogram(binwidth = 1, color = "darkblue", fill = "lightblue") +
    scale_x_continuous(
        breaks = seq(floor(min(df$Year)/10)*10, 
            max(df$Year), 
            by = 10
        )
    )
#   coord_flip()

ggplotly(rankplot)

Scatterplot

# create new dataframe for year counts
df2 <- aggregate(cbind(count = Album) ~ Year,
    data = df,
    FUN = function(x){NROW(x)})

# add index as a new column
df2$Index <- 1:nrow(df2)

# sort by count
df2 <- df2[order(df2$count),]

# renumber index
row.names(df2) <- 1:nrow(df2)

ranking <- as.numeric(row.names(df2))

yearplot <- ggplot(df2,
    aes(
        x = Year
    )
) + 
    geom_point(color = "#4472C4", aes(y = Index)) +
    geom_point(color = "#ED7D31", aes(y = ranking)) +
    scale_x_continuous(
        breaks = seq(floor(min(df$Year)/10)*10, 
            max(df$Year), 
            by = 10
        )
    )

ggplotly(yearplot)